1

# 1

roiDir = "C:/Users/wilson/Desktop/data/"
fileList = dir(roiDir)
fullPath = paste(roiDir, fileList[1], sep = "")
dat = readSubject(fullPath)
sub1 <- dat[[1]]
sub1 <- as.data.frame(sub1)
sub1 %>% 
  mutate(rawid = NULL, icv = sum(volume * (roi != "CSF"))) %>%
  select(-min, -max, -mean, -std)
##               roi volume type level     icv
## 1 Telencephalon_L 531111    1     1 1268519
## 2 Telencephalon_R 543404    1     1 1268519
## 3  Diencephalon_L   9683    1     1 1268519
## 4  Diencephalon_R   9678    1     1 1268519
## 5   Mesencephalon  10268    1     1 1268519
## 6   Metencephalon 159402    1     1 1268519
## 7  Myelencephalon   4973    1     1 1268519
## 8             CSF 109776    1     1 1268519

2

# 2

dat <- read.table("C:/Users/wilson/Desktop/classInterests.txt",header = T)
dat$Year <- factor(dat$Year, levels = c("Sophomore","Junior", "Senior","Master's", "PhD"))
p1 <- ggplot(data = dat, aes(x=Year))+ geom_bar()+labs(title="Bar plot of students' year", x="Year", y="Count")
p2 <- ggplot(data = dat, aes(x=Program))+ geom_bar()+labs(title="Bar plot of students' program", x="Program", y="Count")
p3 <- ggplot(data = dat, aes(x=Year,fill=Program))+ geom_bar()+ theme_minimal()+labs(title="Bar plot of students' year and program", x="Year", y="Count")
ggplotly(p1)
p2i <- ggplotly(p2)
p2i
p3i <- ggplotly(p3)
p3i

3

# 3

p4 <- ggplot(data = dat)+ geom_mosaic(aes(x=product(Year,Program),fill=Year))+
  labs(title="Mosaic plot of the class data for year and program", x="Program", y="Year")+
  theme(axis.title.y=element_text(vjust = -20,colour = "white"),axis.text.x=element_text(angle=90,hjust=1),axis.text.y=element_text(colour = "white")) 
p4i <- ggplotly(p4)
p4i

4

# 4
dat4 <- read_csv("GSE5859_exprs.csv", )
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character()
## )
## See spec(...) for full column specifications.
dt4 <- dat4 %>% select(-X1)
rmv <- rowMeans(dt4)
rmm <- matrix(rep(rmv,ncol(dt4)), nrow = nrow(dt4), ncol = ncol(dt4))
cmv <- colMeans(dt4)
cmm <- matrix(rep(cmv,nrow(dt4)), byrow = T, ncol = ncol(dt4))
dt4 <- dt4-rmm-cmm
result <- cbind(dat4[,1],dt4)
head(select(result,1:5),5)
##          X1 GSM25581.CEL.gz GSM25681.CEL.gz GSM136524.CEL.gz GSM136707.CEL.gz
## 1 1007_s_at       -5.753311       -6.368852        -5.952808        -5.613885
## 2   1053_at       -5.540006       -5.413417        -5.325963        -5.829139
## 3    117_at       -5.420924       -6.373643        -6.163945        -5.806505
## 4    121_at       -6.352589       -6.280611        -5.766674        -5.367892
## 5 1255_g_at       -5.303813       -5.810665        -5.795717        -5.576371

5

# 5
dat5 <- read_csv("healthcare-spending.csv",skip = 2)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Location = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 12 parsing failures.
## row col   expected    actual                      file
##  53  -- 25 columns 1 columns 'healthcare-spending.csv'
##  54  -- 25 columns 1 columns 'healthcare-spending.csv'
##  55  -- 25 columns 1 columns 'healthcare-spending.csv'
##  56  -- 25 columns 1 columns 'healthcare-spending.csv'
##  57  -- 25 columns 1 columns 'healthcare-spending.csv'
## ... ... .......... ......... .........................
## See problems(...) for more details.
dat5 <- dat5[2:52,]
names(dat5) <- (c("Location",1991:2014))
dt5 <- gather(dat5, Year, HealthcareSpending, "1991":"2014")
dat5 <- read_csv("healthcare-spending.csv",skip = 2)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Location = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 12 parsing failures.
## row col   expected    actual                      file
##  53  -- 25 columns 1 columns 'healthcare-spending.csv'
##  54  -- 25 columns 1 columns 'healthcare-spending.csv'
##  55  -- 25 columns 1 columns 'healthcare-spending.csv'
##  56  -- 25 columns 1 columns 'healthcare-spending.csv'
##  57  -- 25 columns 1 columns 'healthcare-spending.csv'
## ... ... .......... ......... .........................
## See problems(...) for more details.
dat5 <- dat5[2:52,]
names(dat5) <- (c("Location",1991:2014))
dt5 <- gather(dat5, Year, HealthcareSpending, "1991":"2014")
p5 <- ggplot(dt5,aes(x = Year,y=HealthcareSpending, color=Location))+
  geom_point()+
  theme_minimal()+
  labs(title="Scatter plot of healthcare spending versus time by states.", x="Year", y="Healthcare spending")+
  theme(axis.text.x=element_text(angle=90,hjust=1),legend.text = element_text(size=7)) 
p5i <- ggplotly(p5)
p5i

6

# 6
dat6 <- dat5 %>% 
  mutate(AverageHealthCost=rowMeans(dat5[,-1]))
p6 <- ggplot(dat6,aes(x=Location,y=AverageHealthCost))+
  geom_col()+
  theme_minimal()+
  labs(title="Barplot of average health care spending by state", x="Location", y="Average health care spending")+
  theme(axis.text.x=element_text(angle=90,hjust=1)) 
p6i <- ggplotly(p6)
p6i